
/* This script creates the basic data structures for all objects used by the model. */

USE [master];

IF  NOT EXISTS (SELECT * FROM sys.databases WHERE name = N'population')
    BEGIN
        CREATE DATABASE [population]
    END;

USE [population];


/* PRIMARY DATA TABLES
  WHERE INFO ABOUT THE POPULATION IS STORED.   
  THESE TABLES COULD BE SELECTED FROM TO GENERATE NEW DATA REPORTS. */

/* the main table where generated people are recorded.
   Data is stored related to each person. */
CREATE TABLE person
  (
     person_id      INT NOT NULL, --uniquely identifies a person
     parents_id     INT NOT NULL, --links to their parents' marriage record
     sex            BIT NOT NULL, --0 for female, 1 for male
     year_of_birth  INT NOT NULL, --when they were born.
     surv_pen       BIGINT NOT NULL, --higher values make a person more likely to die in childhood.
     success_pen    BIGINT NOT NULL, --higher values reduce success in adulthood.
     attr_pen       BIGINT NOT NULL, --higher values reduce success in finding a mate. 
     inher_pen      BIGINT NOT NULL, --higher values reduce success in securing an inheritance.
     mutation_count INT NOT NULL, --the total number of deleterious mutations this person possesses in their genome. Includes non-expressed recessive genes. 
     lineage_id     INT NOT NULL, --uniquely identifies the family lineage of the person.
     inheritor      BIT,          --whether this person stands to inherit the family lineage. 0 if no, 1 if yes. NULL if this family doesn't track lineages.
     line_gen       INT NOT NULL  --the total number of generations since this family lineage was established. Members of the same line_gen and lineage compete for inheritance. 
  );

/* Data about the genome and all possible mutations. Randomly generated for each simulation. */
CREATE TABLE genome
  (
     mutation_id          SMALLINT NOT NULL, -- the locus of the mutation
     base_surv_pen        INT NOT NULL,      -- the child-survival penalty of this mutation before accounting for variants and codeleteriousness. 
     base_attr_pen        INT NOT NULL,      -- the attractiveness penalty of this mutation before accounting for variants and codeleteriousness. 
     base_inher_pen       INT NOT NULL,      -- the endearment penalty of this mutation before accounting for variants and codeleteriousness. 
     base_success_pen     INT NOT NULL,      -- the adult-success penalty of this mutation before accounting for codeleteriousness. 
     dominance_type       TINYINT NOT NULL,  -- 0: Deleterious variant is dominant. 1: Deleterious variant is recessive. 2: heterozygous is beneficial and homozygous is deleterious.
     variance             SMALLINT NOT NULL, -- how much the particular variant of the mutation affects its deleteriousness. 
     heterozygous_benefit INT,               -- if the mutation is beneficial in heterozygous form, it is beneficial by this much. 
     heterozygous_divisor INT                -- if the mutation is weaker in heterozygous form, it is this much weaker. 
  );

/* data about all genes for all people are stored here.
   if there are no mutations at a locus, no data is stored for that locus. */
CREATE TABLE mutation
  (
     person_id       INT NOT NULL,      --links to the person who has this gene.
     mutation_id     SMALLINT NOT NULL, --the locus of the mutation.
     mothers_variant TINYINT NOT NULL,  --this variant of the gene was inherited from the person's mother. 
     fathers_variant TINYINT NOT NULL   --this variant of the gene was inherited from the person's father.
  );

--Table that stores data about married couples that produce children. 
CREATE TABLE married_pair
  (
     pair_id          INT NOT NULL,               -- uniquely identifies a couple
     female_id        INT NOT NULL,               --the woman of couple
     male_id          INT NOT NULL,               --the man of the couple
     children         TINYINT NOT NULL DEFAULT 0, --the total number of children the couple has, including children who have died. 
     desired_children TINYINT NOT NULL,           --the total number of children the couple plans to have. 
     lineage_id       INT NOT NULL,               --uniquely identifies the family lineage of the inheriting member of the couple. 
     line_gen         INT NOT NULL,               --the total number of generations since this family lineage was established. 
     track_lineages   BIT NOT NULL                --1 if this family maintains a family lineage with designated inheritors/successors. 0 if they don't.  
  );

--tracks births and deaths for the purposes of calculating child mortality and fertility variance
CREATE TABLE statistics_table
  (
     births       INT,          --number of people who were born in the most recent two-year period
     deaths       INT NOT NULL, --number of people who died in the most recent two-year period
     current_pop  INT NOT NULL, --the total population, not counting recent births but accounting for recent deaths 
     current_year INT NOT NULL, --the year these statistics were recorded.
	 fertility_variance decimal(10,4), --how much a family's size differs from 2, on average. 
	 child_mortality decimal(10,4) -- percentage chance of someone dying before age 16. 
  );



/* SETTINGS TABLES. 
  THESE TABLES ARE EXTENSIONS OF THE MODEL'S SETTINGS.
  THE MAIN SETTINGS CAN BE FOUND AT THE TOP OF THE MODEL.sql SCRIPT. */

/* an actuarial table that records when people are likely to die, become infertile, 
   or otherwise have difficulties that makes it impossible for them to have further children. */
CREATE TABLE death_table
  (
     age        TINYINT NOT NULL, --a list of possible even-numbered ages in years.
     death_rate SMALLINT NOT NULL --how likely anyone is to "die" over the given two year period. 
  );

/* a table that records the increased likelyhood of mutations occuring in newborns according to the age and sex of each parent.
   older parents are more likely to pass on new mutations to their children. */
CREATE TABLE parental_age_mutations
  (
     age                     TINYINT NOT NULL,      --the age of the parent.
     sex                     BIT NOT NULL,          --the sex of the parent.
     increased_mutation_rate DECIMAL(4, 2) NOT NULL --increased chance of passing mutations based on this age and sex. 
  );

/* A table that stores the year a simulation should stop. 
   This value can be updated while the simulation is running to stop it early.
   */
CREATE TABLE stop_year
  (
     stop_year int not null
  );

INSERT INTO stop_year (stop_year) 
VALUES                (0);


/* STAGING AND PERFORMANCE TABLES
  THE FOLLOWING TABLES ARE USED FOR TECHNICAL MANIPULATION OF DATA. 
  UNLESS YOU HAVE A TECHNICAL INTEREST IN HOW THE MODEL WORKS, YOU DON'T NEED TO EVER EXAMINE THEM. */

/* redundant data about each couple's mutations is stored here for performance reasons. */
CREATE TABLE married_pair_mutation
  (
     pair_id                 INT NOT NULL,
     mutation_id             SMALLINT NOT NULL,
     females_mothers_variant TINYINT,
     females_fathers_variant TINYINT,
     males_mothers_variant   TINYINT,
     males_fathers_variant   TINYINT
  );

/* a staging table that holds people who have "died" this year for further processing. */
CREATE TABLE staging_dead_person
  (
     person_id     INT NOT NULL
  );

/* a staging table that holds dissolved unions for further processing. */
CREATE TABLE staging_done_married_pair
  (
     pair_id  INT,
     widow_id INT
  );

/* a staging table that holds new births for further processing. */
CREATE TABLE staging_person
  (
     person_id      INT IDENTITY NOT NULL,
     parents_id     INT NOT NULL,
     surv_pen       BIGINT,
     success_pen    BIGINT,
     attr_pen       BIGINT,
     inher_pen      BIGINT,
     new_mut_count  INT,
     lineage_id     INT,
     mut_pen        INT,
     mutation_count INT,
     line_gen       INT,
     inheritor      BIT
  );

/* a staging table that represents the potential women in a matchmaking pool. */
CREATE TABLE staging_bride
  (
     person_id  INT,
     ranked     INT,
     lineage_id INT,
     line_gen   INT
  );

/* a staging table that represents the potential men in a matchmaking pool. */
CREATE TABLE staging_groom
  (
     person_id  INT,
     ranked     INT,
     lineage_id INT,
     line_gen   INT
  );

/* a staging table that holds provisional marriages. */
CREATE TABLE staging_married_pair
  (
     pair_id    INT IDENTITY NOT NULL,
     female_id  INT NOT NULL,
     male_id    INT NOT NULL,
     lineage_id INT,
     track_lineages BIT not null,
     inheritor_sex BIT,
     line_gen INT
  );
  
/* this table temporarily holds the mutation data as the table is being cleared out of excess data */
CREATE TABLE staging_mutation
  (
     person_id       INT NOT NULL,      --links to the person who has this gene.
     mutation_id     SMALLINT NOT NULL, --the locus of the mutation.
     mothers_variant TINYINT NOT NULL,  --this variant of the gene was inherited from the person's mother. 
     fathers_variant TINYINT NOT NULL   --this variant of the gene was inherited from the person's father.
  );
  
/* this table temporarily holds the mutation data as the table is being cleared out of excess data */
CREATE TABLE staging_married_pair_mutation
  (
     pair_id                 INT NOT NULL,
     mutation_id             SMALLINT NOT NULL,
     females_mothers_variant TINYINT,
     females_fathers_variant TINYINT,
     males_mothers_variant   TINYINT,
     males_fathers_variant   TINYINT
  );


/* a table that contains a short list of numbers from 1 to 200
   this table is used to simplify the algorithm when generating new mutations */
CREATE TABLE small_integers
  (
     n INT NOT NULL
  );

DECLARE @n INT = 1;

/* the small_integers table is populated with small integers */
WHILE ( @n <= 200 )
  BEGIN
      INSERT INTO small_integers
                  (n)
      VALUES      (@n);

      SET @n = @n + 1;
  END;

go

/* PROGRAMMATIC FUNCTIONS AND VIEWS
  Used for technical implementation of the model */

/* this view is a workaround for a SQL Server bug that prevents functions from calling newid() */
CREATE VIEW [dbo].[new_random]
AS
  SELECT CASE
           WHEN Floor(Rand(Checksum(Newid())) * 100) = 1 THEN 1
           ELSE 0
         END AS ran;

go

/* this function is used to generate the random number of new mutations each new child will receive */
CREATE FUNCTION calculate_mut_count (
@mut_limit INT)
returns INT
AS
  BEGIN
      DECLARE @i INT = 0;
      DECLARE @ret_val INT = 0;

      WHILE @i < @mut_limit
        BEGIN
            SET @ret_val = @ret_val
                           + (SELECT ran
                              FROM   new_random);

            SET @i = @i + 1;
        END;

      RETURN @ret_val;
  END;

go  

/* INDEXES
  THE TABLES ARE INDEXED FOR PERFORMANCE REASONS. 
  THESE INDEXES HAVE NO EFFECT ON THE FUNCTIONALITY OF THE MODEL. THEY ONLY AFFECT HOW FAST IT COMPUTES. */

CREATE UNIQUE CLUSTERED INDEX [clustered_index_genome]
  ON [dbo].[genome] ( [mutation_id] ASC )
  WITH (pad_index = OFF, statistics_norecompute = OFF, sort_in_tempdb = OFF,
ignore_dup_key = OFF, drop_existing = OFF, online = OFF, allow_row_locks = ON,
allow_page_locks = ON) ON [primary];

go

CREATE CLUSTERED INDEX [clustered_index_small_integers]
  ON [dbo].[small_integers] ( [n] ASC )
  WITH (pad_index = OFF, statistics_norecompute = OFF, sort_in_tempdb = OFF,
drop_existing = OFF, online = OFF, allow_row_locks = ON, allow_page_locks = ON)
ON [primary];

go

CREATE UNIQUE CLUSTERED INDEX [clustered_index_person]
  ON [dbo].[person] ( [person_id] ASC )
  WITH (pad_index = OFF, statistics_norecompute = OFF, sort_in_tempdb = OFF,
ignore_dup_key = OFF, drop_existing = OFF, online = OFF, allow_row_locks = ON,
allow_page_locks = ON) ON [primary];

go

CREATE UNIQUE CLUSTERED INDEX [clustered_index_statistics_table]
  ON [dbo].[statistics_table] ( [current_year] ASC )
  WITH (pad_index = OFF, statistics_norecompute = OFF, sort_in_tempdb = OFF,
ignore_dup_key = OFF, drop_existing = OFF, online = OFF, allow_row_locks = ON,
allow_page_locks = ON) ON [primary];

go

CREATE UNIQUE CLUSTERED INDEX [clustered_index_married_pair]
  ON [dbo].[married_pair] ( [pair_id] ASC )
  WITH (pad_index = OFF, statistics_norecompute = OFF, sort_in_tempdb = OFF,
ignore_dup_key = OFF, drop_existing = OFF, online = OFF, allow_row_locks = ON,
allow_page_locks = ON) ON [primary];

go

CREATE UNIQUE CLUSTERED INDEX [clustered_index_married_pair_mutation]
  ON [dbo].[married_pair_mutation] ( [pair_id] ASC, [mutation_id] ASC )
  WITH (pad_index = OFF, statistics_norecompute = OFF, sort_in_tempdb = OFF,
ignore_dup_key = OFF, drop_existing = OFF, online = OFF, allow_row_locks = ON,
allow_page_locks = ON) ON [primary];

go

CREATE UNIQUE CLUSTERED INDEX [clustered_index_married_pair_mutation2]
  ON [dbo].[staging_married_pair_mutation] ( [pair_id] ASC, [mutation_id] ASC )
  WITH (pad_index = OFF, statistics_norecompute = OFF, sort_in_tempdb = OFF,
ignore_dup_key = OFF, drop_existing = OFF, online = OFF, allow_row_locks = ON,
allow_page_locks = ON) ON [primary];

go

CREATE UNIQUE CLUSTERED INDEX [clustered_index_mutation]
  ON [dbo].[mutation] ( [person_id] ASC, [mutation_id] ASC )
  WITH (pad_index = OFF, statistics_norecompute = OFF, sort_in_tempdb = OFF,
ignore_dup_key = OFF, drop_existing = OFF, online = OFF, allow_row_locks = ON,
allow_page_locks = ON) ON [primary];

go

CREATE UNIQUE CLUSTERED INDEX [clustered_index_mutation2]
  ON [dbo].[staging_mutation] ( [person_id] ASC, [mutation_id] ASC )
  WITH (pad_index = OFF, statistics_norecompute = OFF, sort_in_tempdb = OFF,
ignore_dup_key = OFF, drop_existing = OFF, online = OFF, allow_row_locks = ON,
allow_page_locks = ON) ON [primary];

go

CREATE UNIQUE CLUSTERED INDEX [clustered_index_staging_person]
  ON [dbo].[staging_person] ( [person_id] ASC )
  WITH (pad_index = OFF, statistics_norecompute = OFF, sort_in_tempdb = OFF,
ignore_dup_key = OFF, drop_existing = OFF, online = OFF, allow_row_locks = ON,
allow_page_locks = ON) ON [primary];

go

CREATE UNIQUE CLUSTERED INDEX [clustered_index_staging_married_pair]
  ON [dbo].[staging_married_pair] ( [pair_id] ASC )
  WITH (pad_index = OFF, statistics_norecompute = OFF, sort_in_tempdb = OFF,
ignore_dup_key = OFF, drop_existing = OFF, online = OFF, allow_row_locks = ON,
allow_page_locks = ON) ON [primary];

go

CREATE CLUSTERED INDEX [clustered_index_staging_done_married_pair]
  ON [dbo].[staging_done_married_pair] ( [pair_id] ASC )
  WITH (pad_index = OFF, statistics_norecompute = OFF, sort_in_tempdb = OFF,
drop_existing = OFF, online = OFF, allow_row_locks = ON, allow_page_locks = ON)
ON [primary];

go

CREATE NONCLUSTERED INDEX [non_clustered_index_person_year_of_birth]
  ON [dbo].[person] ( [year_of_birth] ASC )
  include ( [surv_pen], [success_pen]) WITH (pad_index = OFF,
statistics_norecompute = OFF, sort_in_tempdb = OFF, drop_existing = OFF, online
= OFF, allow_row_locks = ON, allow_page_locks = ON) ON [PRIMARY];

go

